Устанавливаем необходимые библиотки
library(tidyverse)
library(DESeq2)
library(pheatmap)
library(RColorBrewer)
library(clusterProfiler)
library(biomaRt)
library(org.Hs.eg.db)
library(EnhancedVolcano)
library(GenomicRanges)
library(msigdbr)
library(multiMiR)
library(miRBaseConverter)
library(enrichplot)
library(vsn)
library(rvest)
library(patchwork)
library(dbplyr)
coldata <- read_tsv("data/phenotable.tsv", show_col_types = FALSE)
rownames(coldata) <- coldata$sample
Warning: Setting row names on a tibble is deprecated.
coldata
counts <- read.csv("data/miR.Counts.csv", header = TRUE, sep = ",")
counts <- column_to_rownames(counts, var = "miRNA")
head(counts)
colnames(counts) <- gsub("^X", "", colnames(counts))
counts_samples <- colnames(counts)
phenotable_samples <- coldata$sample
common_samples <- intersect(counts_samples, phenotable_samples)
counts <- counts[, c(counts$miRNA, common_samples)]
counts <- counts[, rownames(coldata)] #ранжирую по колонки в counts так же как и названия строк в coldata
head(counts)
anno <- read.csv("data/annotation.report.csv", header = TRUE, sep = ",")
anno$Sample.name.s. <- gsub("-", ".", anno$Sample.name.s.)
anno <- anno[, -c(2:5, 7, 15)]
common_samples <- intersect(anno$Sample.name.s., coldata$sample)
anno <- anno[anno$Sample.name.s. %in% common_samples, ]
anno <- anno[match(rownames(coldata), anno$Sample.name.s.), ] #ранжирую по колонки в counts так же как и названия строк в coldata
anno
anno_long <- anno %>%
pivot_longer(cols = -Sample.name.s., names_to = "RNA_Type", values_to = "Count")
plt <- ggplot(anno_long, aes(x = Sample.name.s., y = Count, fill = RNA_Type)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(x = "Sample", y = "Read Count") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set3") # Красивые цвета
print(plt)
ggsave("./pictures_transpl/transpl_barplot_alldataset_no_normalised.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
anno_long <- anno %>%
rowwise() %>%
mutate(across(-Sample.name.s., ~ . / sum(c_across(-Sample.name.s.)))) %>%
ungroup() %>%
pivot_longer(cols = -Sample.name.s., names_to = "RNA_Type", values_to = "Proportion")
plt <- ggplot(anno_long, aes(x = Sample.name.s., y = Proportion, fill = RNA_Type)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(x = "Sample", y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set3")
plt
ggsave("./pictures_transpl/transpl_barplot_alldataset_normalised.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
coldata$condition <- relevel(factor(coldata$condition), ref = "no_complications")
modelMatrix <- model.matrix(~condition, coldata)
modelMatrix
(Intercept) conditioncellular conditionhumoral conditionTCAD
1 1 0 0 0
2 1 0 0 0
3 1 0 1 0
4 1 1 0 0
5 1 0 0 1
6 1 0 0 1
7 1 0 0 1
8 1 0 0 0
9 1 0 0 0
10 1 0 0 0
11 1 0 0 0
12 1 0 1 0
13 1 0 1 0
14 1 1 0 0
15 1 1 0 0
16 1 1 0 0
17 1 1 0 0
18 1 1 0 0
19 1 0 0 1
20 1 0 0 1
21 1 0 1 0
22 1 0 0 1
23 1 0 0 1
24 1 0 1 0
25 1 0 1 0
26 1 0 0 0
27 1 0 0 0
28 1 0 1 0
29 1 0 0 0
attr(,"assign")
[1] 0 1 1 1
attr(,"contrasts")
attr(,"contrasts")$condition
[1] "contr.treatment"
qr(modelMatrix)$rank # ранг матрицы
[1] 4
ncol(modelMatrix)
[1] 4
dds <- DESeqDataSetFromMatrix(countData = counts, colData = coldata, design = ~ condition)
converting counts to integer mode
dds$condition <- relevel(dds$condition, ref = "no_complications")
dds
class: DESeqDataSet
dim: 913 29
metadata(1): version
assays(1): counts
rownames(913): Hsa-Let-7-P1a_3p* Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p ... Hsa-Mir-9851_3p Hsa-Mir-9851_5p*
rowData names(0):
colnames(29): 105_S1_R1_001 197_S2_R1_001 ... 127_S29_R1_001 138_S30_R1_001
colData names(2): sample condition
dim(dds)
[1] 913 29
smallestGroupSize <- 15
keep <- rowSums(counts(dds) >= 10) >= smallestGroupSize
dds <- dds[keep,]
dim(dds)
[1] 297 29
dds <- DESeq(dds, fitType = "parametric")
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 31 genes
-- DESeq argument 'minReplicatesForReplace' = 7
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
dds
class: DESeqDataSet
dim: 297 29
metadata(1): version
assays(6): counts mu ... replaceCounts replaceCooks
rownames(297): Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p Hsa-Let-7-P1b_5p ... Hsa-Mir-96-P3_3p* Hsa-Mir-96-P3_5p
rowData names(31): baseMean baseVar ... maxCooks replace
colnames(29): 105_S1_R1_001 197_S2_R1_001 ... 127_S29_R1_001 138_S30_R1_001
colData names(4): sample condition sizeFactor replaceable
plotDispEsts(dds)
raw_counts <- counts(dds, normalized = FALSE)
normalized_counts <- counts(dds, normalized = TRUE)
df <- data.frame(
Sample = rep(colnames(dds), 2),
Counts = c(colSums(raw_counts), colSums(normalized_counts)),
Type = rep(c("Raw", "Normalized"), each = ncol(dds))
)
plt <- ggplot(df, aes(x = Sample, y = Counts, fill = Type)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "Counts before and after normalization", x = "Sample", y = "Total Counts") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
plt
ggsave("./pictures_transpl/transpl_Counts before and after normalization.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
rlog трансформация
rlt <- rlog(dds) #rlog Transformation
meanSdPlot(assay(rlt))
vsd <- varianceStabilizingTransformation(dds, blind=FALSE)
meanSdPlot(assay(vsd)) #показывает, как изменяется стандартное отклонение в зависимости от среднего значения экспрессии
** PCA plot **
pcaData <- plotPCA(rlt, intgroup=c("condition"), returnData = TRUE)
using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
pcaData$sample <- gsub("_.*", "", coldata$sample)
plt <- ggplot(pcaData, aes(PC1, PC2, color = condition)) +
geom_text(aes(label=sample), size=3, vjust=1.5) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "%")) +
ylab(paste0("PC2: ", percentVar[2], "%")) +
coord_fixed() +
theme_bw() +
scale_color_brewer(palette = "Set2")
plt
ggsave("./pictures_transpl/transpl_PCA plot.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot a heatmap of 50 most expressed genes Этот heatmap отражает уровни экспрессии генов, а не разницу между группами. Цвета не означают up- или down-регуляцию в сравнении с контрольной группой, потому что heatmap показывает абсолютные значения экспрессии, а не fold change!
select <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:50]
df <- as.data.frame(colData(dds)$condition)
colnames(df) <- "condition"
rownames(df) <- colnames(counts(dds))
plt <- pheatmap(assay(rlt)[select,],
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
annotation_col = df,
fontsize_row = 6)
plt
ggsave("./pictures_transpl/transpl_Plot a heatmap of 50 most expressed genes.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot of the distance between samples heatmap Расчет расстояний между образцами • Обычно используется евклидово расстояние (по умолчанию в DESeq2). • Оно вычисляется по нормализованным данным экспрессии (rlog() или vst()). • Чем меньше расстояние — тем более похожи образцы.
sampleDists <- dist(t(assay(rlt)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(rlt$condition)
colnames(sampleDistMatrix) <- paste(rlt$condition)
colors <- colorRampPalette(rev(brewer.pal(9, "Blues")) )(255)
plt <- pheatmap(sampleDistMatrix,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
color = colors)
plt
ggsave("./pictures_transpl/transpl_Plot of the distance between samples heatmap.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
res_humoral <- results(dds, contrast=c("condition", "no_complications", "humoral"))
res_humoral
log2 fold change (MLE): condition no_complications vs humoral
Wald test p-value: condition no_complications vs humoral
DataFrame with 297 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p 72441.408 0.0618842 0.288818 0.214267 0.8303389 0.939409
Hsa-Let-7-P1b_5p 2471.713 -0.9029022 0.495172 -1.823412 0.0682410 0.396094
Hsa-Let-7-P1c_5p 1586.095 -1.0840729 0.471749 -2.297986 0.0215626 0.246039
Hsa-Let-7-P2a1_3p* 106.535 0.0898204 0.728328 0.123324 0.9018504 0.974875
Hsa-Let-7-P2a2_3p* 117.677 -2.1053617 1.182746 -1.780062 0.0750658 0.409032
... ... ... ... ... ... ...
Hsa-Mir-95-P3_5p 11.6942 1.248042 1.169651 1.067020 0.2859626 NA
Hsa-Mir-96-P1_5p 452.3189 1.016073 0.712354 1.426361 0.1537643 0.6037509
Hsa-Mir-96-P2_5p 104065.5500 -0.309282 0.550995 -0.561315 0.5745829 0.8960804
Hsa-Mir-96-P3_3p* 27.6400 0.953532 0.804588 1.185117 0.2359712 0.7233923
Hsa-Mir-96-P3_5p 1433.3806 1.044344 0.364247 2.867132 0.0041421 0.0737293
MA plot Фильтрация точек с низким средним экспрессированием (по baseMean). • Обычно отсекаются baseMean < 1. 2. Определение значимых генов (синие точки): • Используется критерий padj < 0.1 по умолчанию, а не < 0.05!
tiff("./pictures_transpl/transpl_PlotMA_standart_padj_0.05_humoral.tiff",
width = 8, height = 6, units = "in", res = 300, bg = "white")
plotMA(res_humoral, alpha = 0.05, ylim = c(-8, 8))
dev.off()
null device
1
plotMA(res_humoral, alpha = 0.05, ylim = c(-8, 8))
Кастомный MA plot
res_df <- res_humoral %>%
as.data.frame() %>%
mutate(color = case_when(
padj < 0.05 ~ "padj < 0.05",
pvalue < 0.05 ~ "pvalue < 0.05",
TRUE ~ "All"
))
plt <- ggplot(res_df, aes(x = baseMean, y = log2FoldChange, color = color)) +
geom_point(alpha = 0.7, size = 1) +
geom_hline(yintercept = 0, linetype = "solid", color = "gray40", size = 1.5) +
scale_color_manual(values = c("All" = "gray70",
"pvalue < 0.05" = "blue",
"padj < 0.05" = "red")) +
scale_x_log10(labels = scales::scientific) +
theme_minimal() +
labs(x = "mean of normalized counts",
y = "log fold change",
color = NULL)
plt
ggsave("./pictures_transpl/transpl_Сustom MAplot_humoral.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Значимые результаты
58 генов (20%) имеют низкие уровни экспрессии и фильтруются из анализа. independent filtering — процедура, которая исключает гены с низкими значениями для увеличения статистической мощности.
signres_humoral <- results(dds, contrast=c("condition", "no_complications", "humoral"), alpha=0.05)
summary(signres_humoral)
out of 297 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up) : 0, 0%
LFC < 0 (down) : 9, 3%
outliers [1] : 1, 0.34%
low counts [2] : 58, 20%
(mean count < 40)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
Let’s arranged it by log2FoldChange:
order_indices <- order(-res_humoral$log2FoldChange)
res_humoral[order_indices, ]
log2 fold change (MLE): condition no_complications vs humoral
Wald test p-value: condition no_complications vs humoral
DataFrame with 297 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Hsa-Mir-542_3p 179.1170 1.55776 0.661653 2.35434 0.0185557 0.225199
Hsa-Mir-190-P1_5p 19.6983 1.54017 1.120054 1.37509 0.1691041 NA
Hsa-Mir-874_3p 183.9741 1.53509 0.798423 1.92265 0.0545243 0.378167
Hsa-Mir-624_5p 18.9670 1.36865 1.076852 1.27097 0.2037388 NA
Hsa-Mir-95-P3_5p 11.6942 1.24804 1.169651 1.06702 0.2859626 NA
... ... ... ... ... ... ...
Hsa-Mir-873_3p 102.3872 -2.74537 1.229274 -2.23333 2.55274e-02 0.27263230
Hsa-Mir-10-P2a_5p 1162.4941 -2.85595 0.650038 -4.39350 1.11539e-05 0.00148904
Hsa-Mir-483_5p 144.9177 -3.86731 1.288810 -3.00068 2.69377e-03 0.06522233
Hsa-Mir-193-P1b_5p* 152.0458 -4.84885 0.982406 -4.93569 7.98689e-07 0.00021325
Hsa-Mir-193-P1b_3p 99.8971 -5.64893 1.644846 -3.43432 5.94044e-04 0.02643497
Visualisation for the first gene
#plotCounts(dds, gene=which.max(res_humoral$log2FoldChange), intgroup="condition")
plotCounts(dds, gene=which.min(res_humoral$padj), intgroup="condition")
#plotCounts(dds, gene=rownames(res)[which.min(res$padj[which.max(res$log2FoldChange)])], intgroup="condition")
Volcano plot
plt <- EnhancedVolcano(res_humoral,
lab = rownames(res_humoral),
x = "log2FoldChange",
y = "padj",
pCutoff = 0.05,
FCcutoff = 1,
labSize = 3.0,
boxedLabels = FALSE,
col = c('black', '#CBD5E8', '#B3E2CD', '#FDCDAC'),
colAlpha = 1,
title = NULL,
subtitle = NULL)
plt
ggsave("./pictures_transpl/transpl_VolcanoPlot_humoral.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
coldata_filtered <- coldata[coldata$condition %in% c("humoral", "no_complications"), ]
coldata_filtered
Plot a heatmap of diff expressed genes
res_sign_humoral <- subset(res_humoral, padj < 0.05 & !is.na(padj) & abs(log2FoldChange) > 1.0)
res_sign_humoral <- res_sign_humoral[order(res_sign_humoral$log2FoldChange, decreasing = TRUE), ]
sig_genes <- rownames(res_sign_humoral)
de_mat <- assay(rlt)[sig_genes, ]
de_mat_filtered <- de_mat[, coldata_filtered$sample]
#datamatrix <- t(scale(t(de_mat_filtered)))
datamatrix <- de_mat_filtered
annotation_col <- data.frame(condition = coldata_filtered$condition)
rownames(annotation_col) <- colnames(datamatrix)
annotation_colors <- list(
condition = c("no_complications" = "#FFCC00", "humoral" = "#3399FF"))
plt <- pheatmap(datamatrix,
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
annotation_col = annotation_col,
annotation_colors = annotation_colors,
display_numbers = FALSE,
legend = TRUE,
fontsize = 15)
ggsave("./pictures_transpl/transpl_Heatmap of diff expressed genes_humoral.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
up_humoral <- res_sign_humoral %>%
as.data.frame() %>%
filter(log2FoldChange > 0)
down_humoral <- res_sign_humoral %>%
as.data.frame() %>%
filter(log2FoldChange < 0)
rownames(up_humoral)
character(0)
rownames(down_humoral)
[1] "Hsa-Mir-146-P1_5p" "Hsa-Mir-425_3p*" "Hsa-Mir-148-P2_3p" "Hsa-Mir-10-P3c_5p" "Hsa-Mir-10-P2a_5p"
[6] "Hsa-Mir-193-P1b_5p*" "Hsa-Mir-193-P1b_3p"
Переводим в miRBase • miRBase: https://www.mirbase.org/ • MirGeneDB: https://mirgenedb.org/
url <- "https://mirgenedb.org/browse/hsa"
page <- read_html(url)
Парсим таблицу
mir_table <- page %>%
html_element("table") %>%
html_table(fill = TRUE)
mir_table <- mir_table[-c(1:3), c(1,2) ]
colnames(mir_table) <- c("MirGeneDB_ID", "MiRBase_ID")
mir_table$MirGeneDB_ID <- sub(" V", "", mir_table$MirGeneDB_ID)
head(mir_table)
down_humoral_clean <- sub("_.*", "", row.names(down_humoral))
down_humoral_converted <- mir_table$MiRBase_ID[match(down_humoral_clean, mir_table$MirGeneDB_ID)]
down_humoral_converted
[1] "hsa-mir-146b" "hsa-mir-425" NA "hsa-mir-125b-2" NA "hsa-mir-193a"
[7] "hsa-mir-193a"
Конвертация в MIMATID NA без соответствия удалила из анализа
NA Hsa-Mir-148-P2_3p есть три похожих соответствия: Hsa-Mir-148-P1
hsa-mir-148a
Hsa-Mir-148-P3 hsa-mir-152
Hsa-Mir-148-P4 hsa-mir-148b
NA Hsa-Mir-10-P2a_5p есть три похожих соответствие: Hsa-Mir-10-P2b
hsa-mir-99b
Hsa-Mir-10-P2c hsa-mir-99a
Hsa-Mir-10-P2d hsa-mir-100
[1] “Hsa-Mir-146-P1_5p” “Hsa-Mir-425_3p” ”Hsa-Mir-148-P2_3p”
”Hsa-Mir-10-P3c_5p” ”Hsa-Mir-10-P2a_5p”
[6] ”Hsa-Mir-193-P1b_5p” “Hsa-Mir-193-P1b_3p”
mirna_names_down <- c("hsa-miR-146b-5p", "hsa-miR-425-3p", "hsa-miR-125b-5p", "hsa-miR-193a-5p", "hsa-miR-193a-3p")
converted_mirna_down <- miRNAVersionConvert(mirna_names_down)
converted_mirna_down
Запрос таргетов из базы multiMiR
targets_humoral_down <- unique(get_multimir(org = "hsa", mirna = converted_mirna_down$Accession, table = "validated")@data$target_symbol)
Searching mirecords ...
Searching mirtarbase ...
Searching tarbase ...
#writeLines(targets_down, "targets_down150_list.txt")
Анализ обогащения из базы биологических процессов
#msig_go_bp <- msigdbr(species = "Homo sapiens", category = "C5", subcategory = "GO:BP")
# targets_down <- readLines("targets_down150_list.txt")
# targets_up <- readLines("targets_up150_list.txt")
GO_enrich_down_humoral_bp <- enrichGO(
gene = targets_humoral_down,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
Визуализация
p1 <- dotplot(GO_enrich_down_humoral_bp, showCategory = 20) +
ggtitle("GO Enrichment for DOWNregulated targets") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 11)
)
p1
ggsave("./pictures_transpl/transpl_GO_enrichment_dotplot_down_humoral_bp.tiff", plot = p1, width = 16, height = 10, dpi = 300)
GO_enrich_DOWN_humoral_BP <- enrichplot::pairwise_termsim(GO_enrich_down_humoral_bp, method = "JC")
plt <- emapplot(GO_enrich_DOWN_humoral_BP,
repel = TRUE,
showCategory = 20) +
ggtitle("Biological processes for DOWNregulated targets for humoral") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures_transpl/transpl_GO_enrichment_emapplot_DOWN_humoral_BP.tiff", plot = plt, width = 16, height = 10, dpi = 300)
##Клеточное отторжение
res_cellular <- results(dds, contrast=c("condition", "no_complications", "cellular"))
res_cellular
log2 fold change (MLE): condition no_complications vs cellular
Wald test p-value: condition no_complications vs cellular
DataFrame with 297 rows and 6 columns
baseMean log2FoldChange lfcSE
<numeric> <numeric> <numeric>
Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p 72441.408 0.154411 0.302046
Hsa-Let-7-P1b_5p 2471.713 -0.210240 0.517815
Hsa-Let-7-P1c_5p 1586.095 0.143934 0.493386
Hsa-Let-7-P2a1_3p* 106.535 -0.261443 0.759181
Hsa-Let-7-P2a2_3p* 117.677 -0.228063 1.237539
... ... ... ...
Hsa-Mir-95-P3_5p 11.6942 -0.95512860 1.183561
Hsa-Mir-96-P1_5p 452.3189 0.23819994 0.743879
Hsa-Mir-96-P2_5p 104065.5500 0.40633653 0.576244
Hsa-Mir-96-P3_3p* 27.6400 -0.00541547 0.825501
Hsa-Mir-96-P3_5p 1433.3806 0.62651744 0.380360
stat pvalue padj
<numeric> <numeric> <numeric>
Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p 0.511217 0.609199 0.999634
Hsa-Let-7-P1b_5p -0.406015 0.684732 0.999634
Hsa-Let-7-P1c_5p 0.291728 0.770495 0.999634
Hsa-Let-7-P2a1_3p* -0.344375 0.730564 0.999634
Hsa-Let-7-P2a2_3p* -0.184288 0.853788 0.999634
... ... ... ...
Hsa-Mir-95-P3_5p -0.80699552 0.4196691 0.999634
Hsa-Mir-96-P1_5p 0.32021308 0.7488068 0.999634
Hsa-Mir-96-P2_5p 0.70514677 0.4807189 0.999634
Hsa-Mir-96-P3_3p* -0.00656022 0.9947657 0.999634
Hsa-Mir-96-P3_5p 1.64717130 0.0995228 0.999634
MA plot
tiff("./pictures_transpl/transpl_PlotMA_standart_padj_0.05_cellular.tiff",
width = 8, height = 6, units = "in", res = 300, bg = "white")
plotMA(res_cellular, alpha = 0.05, ylim = c(-8, 8))
dev.off()
null device
1
plotMA(res_cellular, alpha = 0.05, ylim = c(-8, 8))
Кастомный MA plot
res_df <- res_cellular %>%
as.data.frame() %>%
mutate(color = case_when(
padj < 0.05 ~ "padj < 0.05",
pvalue < 0.05 ~ "pvalue < 0.05",
TRUE ~ "All"
))
plt <- ggplot(res_df, aes(x = baseMean, y = log2FoldChange, color = color)) +
geom_point(alpha = 0.7, size = 1) +
geom_hline(yintercept = 0, linetype = "solid", color = "gray40", size = 1.5) +
scale_color_manual(values = c("All" = "gray70",
"pvalue < 0.05" = "blue",
"padj < 0.05" = "red")) +
scale_x_log10(labels = scales::scientific) +
theme_minimal() +
labs(x = "mean of normalized counts",
y = "log fold change",
color = NULL)
plt
ggsave("./pictures_transpl/transpl_Сustom MAplot_cellular.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
БКAПC
res_TCAD <- results(dds, contrast=c("condition", "no_complications", "TCAD"))
res_TCAD
log2 fold change (MLE): condition no_complications vs TCAD
Wald test p-value: condition no_complications vs TCAD
DataFrame with 297 rows and 6 columns
baseMean log2FoldChange lfcSE
<numeric> <numeric> <numeric>
Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p 72441.408 0.1355361 0.288810
Hsa-Let-7-P1b_5p 2471.713 -0.3022559 0.495113
Hsa-Let-7-P1c_5p 1586.095 0.0210025 0.471732
Hsa-Let-7-P2a1_3p* 106.535 0.6849887 0.727057
Hsa-Let-7-P2a2_3p* 117.677 0.7040008 1.184495
... ... ... ...
Hsa-Mir-95-P3_5p 11.6942 1.118711 1.146318
Hsa-Mir-96-P1_5p 452.3189 0.410861 0.711305
Hsa-Mir-96-P2_5p 104065.5500 0.865642 0.550995
Hsa-Mir-96-P3_3p* 27.6400 0.146171 0.789581
Hsa-Mir-96-P3_5p 1433.3806 0.517593 0.363648
stat pvalue padj
<numeric> <numeric> <numeric>
Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p 0.4692911 0.638862 0.999465
Hsa-Let-7-P1b_5p -0.6104787 0.541545 0.999465
Hsa-Let-7-P1c_5p 0.0445222 0.964488 0.999465
Hsa-Let-7-P2a1_3p* 0.9421391 0.346121 0.999465
Hsa-Let-7-P2a2_3p* 0.5943468 0.552280 0.999465
... ... ... ...
Hsa-Mir-95-P3_5p 0.975917 0.329105 0.999465
Hsa-Mir-96-P1_5p 0.577616 0.563523 0.999465
Hsa-Mir-96-P2_5p 1.571051 0.116171 0.999465
Hsa-Mir-96-P3_3p* 0.185125 0.853131 0.999465
Hsa-Mir-96-P3_5p 1.423334 0.154639 0.999465
MA plot
tiff("./pictures_transpl/transpl_PlotMA_standart_padj_0.05_TCAD.tiff",
width = 8, height = 6, units = "in", res = 300, bg = "white")
plotMA(res_TCAD, alpha = 0.05, ylim = c(-8, 8))
dev.off()
null device
1
plotMA(res_TCAD, alpha = 0.05, ylim = c(-8, 8))
res_df <- res_TCAD %>%
as.data.frame() %>%
mutate(color = case_when(
padj < 0.05 ~ "padj < 0.05",
pvalue < 0.05 ~ "pvalue < 0.05",
TRUE ~ "All"
))
plt <- ggplot(res_df, aes(x = baseMean, y = log2FoldChange, color = color)) +
geom_point(alpha = 0.7, size = 1) +
geom_hline(yintercept = 0, linetype = "solid", color = "gray40", size = 1.5) +
scale_color_manual(values = c("All" = "gray70",
"pvalue < 0.05" = "blue",
"padj < 0.05" = "red")) +
scale_x_log10(labels = scales::scientific) +
theme_minimal() +
labs(x = "mean of normalized counts",
y = "log fold change",
color = NULL)
plt
ggsave("./pictures_transpl/transpl_Сustom_MAplot_TCAD.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Значимые результаты
signres_TCAD <- results(dds, contrast=c("condition", "no_complications", "TCAD"), alpha=0.05)
summary(signres_TCAD)
out of 297 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up) : 1, 0.34%
LFC < 0 (down) : 0, 0%
outliers [1] : 1, 0.34%
low counts [2] : 0, 0%
(mean count < 8)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
Volcano plot
plt <- EnhancedVolcano(res_TCAD,
lab = rownames(res_TCAD),
x = "log2FoldChange",
y = "padj",
pCutoff = 0.05,
FCcutoff = 1,
labSize = 3.0,
boxedLabels = FALSE,
col = c('black', '#CBD5E8', '#B3E2CD', '#FDCDAC'),
colAlpha = 1,
title = NULL,
subtitle = NULL)
plt
ggsave("./pictures_transpl/transpl_VolcanoPlot_TCAD.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Анализ обогащения TCAD
res_sign_TCAD <- subset(res_TCAD, padj < 0.05 & !is.na(padj) & abs(log2FoldChange) > 1.0)
up_TCAD <- res_sign_TCAD %>%
as.data.frame() %>%
filter(log2FoldChange > 0)
rownames(up_TCAD)
[1] "Hsa-Mir-582_3p"
mirna_names_up <- "hsa-miR-582-3p"
converted_mirna_up <- miRNAVersionConvert(mirna_names_up)
converted_mirna_up
Запрос таргетов из базы multiMi
targets_TCAD_up <- unique(get_multimir(org = "hsa", mirna = converted_mirna_down$Accession, table = "validated")@data$target_symbol)
Searching mirecords ...
Searching mirtarbase ...
Searching tarbase ...
targets_TCAD_up
[1] "ARF3" "CPNE3" "B4GALT1" "ALDH9A1" "RASA1" "AXIN2"
[7] "ENTPD4" "AMD1" "CEACAM5" "CHRM2" "EIF4EBP2" "GRM5"
[13] "KCNC1" "MPP2" "MYO1C" "PGGT1B" "RAB27A" "RREB1"
[19] "SCN4B" "SFRP1" "SOX9" "STC2" "TNIP1" "MED13L"
[25] "SLC25A37" "GNL3L" "WBP1L" "PTCD3" "RBM28" "CEP55"
[31] "PRPF38A" "DIXDC1" "LRRK2" "GRPEL2" "DUSP18" "SLC16A9"
[37] "ARL10" "NFS1" "GRM6" "DKK3" "SMIM15" "MYL12B"
[43] "ZNF716" "MAP4K4" "CXCL14" "CCS" "KRR1" "NCLN"
[49] "SSX5" "ARHGEF39" "C10orf67" "FER" "FSD2" "GAN"
[55] "SURF4" "INO80D" "SELENOI" "NOL4L" "LENG9" "RTL8C"
[61] "ESS2" "FHIP2A" "PTEN" "MAPK14" "SMC1A" "BCL2"
[67] "NFIA" "NCOA3" "ERBB2" "NRAS" "PLAG1" "LAMC1"
[73] "ZFP36L2" "ERH" "UHMK1" "MMD" "ANKIB1" "DDX5"
[79] "RABGAP1L" "RNF138" "TPM4" "OAT" "G3BP1" "NCOA2"
[85] "API5" "TJP1" "CAMTA1" "CD44" "HIPK3" "BMPR2"
[91] "SP3" "HMGA2" "SERBP1" "HADHB" "ABHD5" "SWAP70"
[97] "QSER1" "PTBP1" "BAZ1A" "CD83" "CMTM4" "HERPUD1"
[103] "ASXL2" "PWWP2A" "MSH2" "PDCD6IP" "PHKB" "PNN"
[109] "NOTCH2" "RHOA" "BCL2L11" "MAPK9" "MYB" "HIF1A"
[115] "CRKL" "KIF1A" "ITCH" "CCND2" "TRPS1" "MBNL2"
[121] "CAPRIN1" "CDH11" "FBN2" "YAP1" "ELK3" "ROCK2"
[127] "AGO2" "AGO3" "CCNA2" "NR4A2" "DNMT1" "EIF4G2"
[133] "SMAD2" "FBXO28" "ST14" "RDX" "TNPO1" "ZFP91"
[139] "ADAM17" "UBAP2" "ATM" "WNK1" "RHOBTB1" "ELAVL1"
[145] "NOG" "CTCF" "ZBTB20" "ABL2" "ACADVL" "ACTN4"
[151] "ACTN1" "PARP1" "AP2A2" "AP1G1" "AGL" "ALCAM"
[157] "ALDH3A2" "ALPI" "SLC25A6" "BIRC3" "ARHGAP5" "ASPH"
[163] "ZFHX3" "KLF5" "DDR1" "CACNA2D1" "CALM1" "CAMK4"
[169] "CAPZA2" "CCNB1" "CD47" "CDC5L" "CFL1" "CHD1"
[175] "LYST" "CLCN5" "TPP1" "CLTC" "COPA" "SLC31A1"
[181] "CPT1A" "CREBBP" "CSNK1G3" "CTNNB1" "CYP24A1" "CYP51A1"
[187] "DAB2" "DDB1" "GADD45A" "DMXL1" "TIMM8A" "DYNC1H1"
[193] "DR1" "DSG2" "DUSP1" "EEF2" "EGR1" "EIF2S3"
[199] "EIF4B" "EIF5" "EP300" "BPTF" "FGF2" "FLOT2"
[205] "FN1" "FUT8" "GFPT1" "GFRA1" "GLG1" "GLO1"
[211] "GNB1" "GOLGA3" "GOLGB1" "GPD2" "GRIN2A" "GSTP1"
[217] "HCCS" "HTT" "HIVEP2" "FOXN2" "IGF1R" "IGFBP4"
[223] "INHBB" "JUP" "KCNN2" "KPNA2" "LGALS3BP" "SMAD3"
[229] "MAPT" "MAT2A" "MBNL1" "MCM3" "MCM6" "MCM7"
[235] "KMT2A" "NEO1" "NFATC3" "NFE2L2" "NUP98" "OPA1"
[241] "ORC2" "PRDX1" "PFKFB2" "PLAGL2" "PLEC" "PLOD2"
[247] "PLSCR1" "PPP1CB" "PPP1CC" "PPP2R1A" "PKN1" "MAPK6"
[253] "MAPK8" "EIF2AK2" "PTPN9" "PURA" "RAD23B" "RASA2"
[259] "RCN1" "RECQL" "REV3L" "RGS16" "ROBO2" "RPL6"
[265] "CLIP1" "RSU1" "RXRB" "CXCL5" "SRSF2" "TRA2B"
[271] "SLC1A5" "SLC5A3" "SLC20A1" "SMARCB1" "SON" "SSR1"
[277] "STRN" "STXBP2" "TBL1X" "TCF12" "PPP1R11" "TMBIM6"
[283] "NR2F2" "TFRC" "TMF1" "TMPO" "TOP1" "NR2C2"
[289] "TRIO" "TXNRD1" "UBE2D2" "UBE2D3" "UBE2G1" "UBE2H"
[295] "UBE2N" "UBP1" "UGCG" "UCK2" "VDAC2" "VEGFC"
[301] "WNT7B" "SF1" "ZNF711" "ZMYM2" "ZNF207" "SLC30A1"
[307] "LUZP1" "SLBP" "REEP5" "PRRC2A" "DDX39B" "KAT6A"
[313] "SLC25A16" "SHOC2" "PTP4A2" "SLC7A5" "NRIP1" "USP9X"
[319] "ARID1A" "TRRAP" "EEA1" "CUL4B" "PKP4" "ENC1"
[325] "COPS3" "CASK" "FAM193A" "CDK13" "RTCA" "EIF3G"
[331] "EIF3I" "EIF3J" "STX16" "BECN1" "SRSF9" "ASAP2"
[337] "ALDH1A2" "FUBP3" "USP10" "MTMR4" "SMC3" "SCAF11"
[343] "BUB3" "DDX21" "CCPG1" "SRSF11" "NREP" "TRIP12"
[349] "GTF3C5" "GTF3C4" "GTF3C3" "EFTUD2" "LIPG" "QKI"
[355] "BAG4" "RBM39" "NUP155" "RAPGEF2" "USP6NL" "DOCK4"
[361] "USP34" "LAPTM4A" "MLEC" "DHX38" "CKAP5" "TSC22D2"
[367] "MELK" "AREL1" "FCHSD2" "TOX4" "SMG7" "LPGAT1"
[373] "AMMECR1" "MED13" "NUP153" "PTBP3" "ACOT8" "PDCD6"
[379] "UBA2" "HUWE1" "LRPPRC" "RBM12" "ABI2" "LHFPL2"
[385] "ALG3" "DDX39A" "IKZF1" "CITED2" "TUBA1B" "VTI1B"
[391] "SYNCRIP" "UNC13B" "DDX17" "IPO7" "ANP32B" "ARL6IP5"
[397] "AGPAT1" "CCT2" "MTHFS" "IVNS1ABP" "EXOC5" "CELF2"
[403] "NFAT5" "STAG2" "AHCYL1" "ARPP21" "WDR3" "BRD8"
[409] "PAPOLA" "DBF4" "KDELR1" "YWHAQ" "COPS5" "METAP2"
[415] "ATF7" "CPSF6" "CDC37" "LSM6" "XPOT" "ATXN2L"
[421] "RRAS2" "XRN2" "PPM1E" "FNDC3A" "ATG14" "TMCC1"
[427] "XPO7" "FNBP1" "SMG1" "NCOA6" "ZNF609" "PPRC1"
[433] "TTLL5" "TNRC6B" "POGZ" "WDR43" "FAM120A" "FAF2"
[439] "PSME4" "ATP11B" "SYNE2" "RPRD2" "DDHD2" "MGA"
[445] "NUP160" "WWC1" "BICD2" "DMXL2" "SMCHD1" "LARP1"
[451] "ZDHHC17" "ADNP" "RYBP" "CBX6" "PHF3" "PES1"
[457] "MACF1" "LRRC8B" "SRRM2" "ZNF281" "LEMD3" "ORC3"
[463] "CD2AP" "ABTB2" "DCAF12" "RNF19A" "NOL11" "ZNF385A"
[469] "GORASP2" "GLCE" "ANKRD17" "APPL1" "HERC4" "MYOF"
[475] "OSTF1" "RPS6KC1" "AP3M1" "RNF11" "RRP7A" "SLCO4A1"
[481] "BZW2" "REPIN1" "GRHL1" "RBM15B" "SNX12" "NRBP1"
[487] "UBQLN2" "BAZ2B" "ITSN2" "ASAP1" "CDON" "GLRX2"
[493] "DYNC1LI1" "CERCAM" "NAGPA" "TUBD1" "LEF1" "SPTBN5"
[499] "UBR5" "CTDSPL2" "TRIM33" "ZFR" "PCBP3" "SLC38A2"
[505] "DNAJC10" "DDX56" "PAF1" "KLHL24" "SWT1" "VPS13C"
[511] "SYTL2" "TMEM248" "VPS13D" "PBRM1" "ZNF532" "SETD5"
[517] "BBS7" "PHF10" "QRSL1" "LRRC36" "SLC39A9" "LGR4"
[523] "ETNK1" "CHD7" "SLC30A6" "N4BP2" "VPS35" "ENAH"
[529] "CCAR1" "TMEM30A" "TRERF1" "ASH1L" "KMT2E" "RNF114"
[535] "PDGFC" "UBFD1" "JPH1" "CMC2" "BBX" "PHTF2"
[541] "PELI1" "ZMIZ1" "BIRC6" "KIAA1143" "SCAF4" "HEG1"
[547] "NLGN4X" "TAOK1" "ARHGAP21" "WDFY1" "EP400" "EPB41L5"
[553] "KMT2C" "AASDHPPT" "ABHD4" "GOLPH3" "LMBR1" "ZMAT3"
[559] "NUCKS1" "RMND5A" "REEP1" "MARCKSL1" "UBE2Z" "AHNAK"
[565] "KCTD15" "TBL1XR1" "SLTM" "MCMBP" "PHC3" "NAA25"
[571] "CHD9" "PAAF1" "WDR26" "KLHL15" "TNKS2" "DDHD1"
[577] "DUSP16" "APOLD1" "C6orf62" "SLIRP" "AIF1L" "AMMECR1L"
[583] "STK40" "ZNRF3" "ZNF644" "ARID5B" "ANTXR1" "PHF6"
[589] "MCM8" "GLYR1" "UNC119B" "FUT10" "ZC3H12C" "R3HDM4"
[595] "TMEM259" "LONRF1" "MTDH" "MOB1B" "FAM114A1" "ZNF618"
[601] "MALSU1" "FOXP4" "AGAP1" "FAT3" "LRIG3" "NEDD1"
[607] "IKBIP" "ARL8A" "NUP35" "FAM168B" "DCBLD2" "LIN54"
[613] "PACRGL" "STXBP5" "BRI3BP" "ANKRD46" "FAM91A1" "USP54"
[619] "TMTC2" "PPTC7" "DGKH" "SPRED1" "TOR1AIP2" "MIER3"
[625] "TMEM64" "ZNF384" "SPRED2" "TUBB" "HIPK1" "MRPL21"
[631] "PRR14L" "STEAP2" "GAS2L3" "SREK1IP1" "LURAP1L" "TTC6"
[637] "SPOPL" "IRF2BP2" "NHLRC2" "MAST4" "SNX19" "ZBTB34"
[643] "CCDC88C" "GPX8" "BRD2" "CCNB2" "DBNDD2" "ADRB2"
[649] "PTCH1" "RPL37" "GGA1" "TCP1" "CENPW" "GOSR2"
[655] "RPS20" "CELF1" "PARP2" "RBM23" "FAM222B" "IKZF2"
[661] "STRN3" "PCBP2" "RAP1GDS1" "RC3H2" "SIKE1" "LDB1"
[667] "TENM2" "MICAL3" "CNBP" "GPBP1" "NUDCD1" "TBCEL"
[673] "JDP2" "ARHGAP26" "NRXN1" "ZBTB43" "PUF60" "ARFGAP3"
[679] "PIEZO1" "CDYL" "AMFR" "TMX2" "SDK2" "SETDB1"
[685] "SAP130" "PTGR1" "EWSR1" "AMACR" "CADM2" "STIM2"
[691] "CAMTA2" "FOXP2" "TTC27" "DCLK1" "VAPB" "OXR1"
[697] "HMGN3" "ZNF664" "SLC19A1" "ZFAND6" "KIAA0586" "SLC30A5"
[703] "CTNND2" "DHX9" "COX7A2" "ELK4" "KPNB1" "PPP2CA"
[709] "PSMB2" "PSMD13" "SH3GL3" "TFAM" "UBA1" "SMARCA5"
[715] "PPFIA1" "HERC1" "JMJD1C" "ASCL1" "PIWIL1" "ZFYVE9"
[721] "TM9SF2" "ATG5" "AQP3" "DDX1" "RAC3" "THRAP3"
[727] "GNL1" "HLA-B" "TCF20" "ARPC2" "ARIH1" "NUTF2"
[733] "SF3B4" "PRCC" "OLFM1" "CCT4" "POLR3C" "LGALS8"
[739] "CGRRF1" "SRCAP" "LMO4" "ZMYM6" "TPX2" "KIAA1549L"
[745] "ZMYND8" "SNRNP200" "SETD2" "CRIPT" "CNOT3" "UBXN4"
[751] "AQR" "CEP104" "SART3" "RNF10" "TRAPPC8" "KLHDC10"
[757] "TRAK2" "R3HDM1" "PTPN23" "AAR2" "ADIPOR1" "OTUD6B"
[763] "DESI2" "COPS4" "TRAPPC4" "TAOK3" "CNOT1" "DDX47"
[769] "RBM38" "SCYL2" "YEATS2" "SAMD4B" "AKIRIN2" "ATF7IP"
[775] "TMEM39A" "TMEM242" "MYO5C" "YLPM1" "GPCPD1" "WDR45B"
[781] "NMT1" "ZNF462" "ZNF277" "ZNF335" "ZNF106" "INTS3"
[787] "ZNF649" "IRX3" "ZMIZ2" "HPS3" "MPV17L2" "CSRNP1"
[793] "SMDT1" "CREB3L1" "ESCO1" "C1orf131" "ARHGAP42" "ZNF362"
[799] "RNF168" "YTHDF3" "C2orf69" "CCDC117" "PRTG" "COX18"
[805] "PLD6" "ZNF714" "UBALD2" "CARM1" "COX4I1" "NBEAL2"
[811] "METTL22" "FBL" "LEO1" "REC8" "OR4K13" "MT-ATP6"
[817] "RPS21" "TUG1" "ZBED6" "CNIH1" "MIEF1" "DYNLL2"
[823] "ERBIN" "ADGRG2" "MAP1LC3B2" "TUBGCP5" "PPP4R3B" "RIC1"
[829] "BORCS7" "SRPRA" "SLF2" "CCL3" "ADGRL2" "ERO1A"
[835] "KMT2B" "SUSD6" "PCNX1" "SELENOT" "EMSY" "LNPK"
[841] "MFSD14B" "ZCCHC3" "NSD2" "MINDY2" "WASHC4" "NSD3"
[847] "SEPTIN2" "SEPTIN7" "SHISAL1" "RBIS" "HARS1" "MACO1"
[853] "GARS1" "SEPTIN10" "ATP5PB" "VIRMA" "TENT5C" "CRACD"
[859] "BLTP2" "ZNF275" "CYB5R4" "BLTP1" "MCL1" "ATP5MK"
[865] "HAPSTR1" "CRACDL" "FEZ1" "U2AF1" "MCTS2"
Анализ обогащения из базы биологических процессов
GO_enrich_up_TCAD_bp <- enrichGO(
gene = targets_TCAD_up,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
Визуализация
p1 <- dotplot(GO_enrich_up_TCAD_bp, showCategory = 20) +
ggtitle("GO Enrichment for UPregulated targets") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 11)
)
p1
ggsave("./pictures_transpl/transpl_GO_enrichment_dotplot_up_TCAD_bp.tiff", plot = p1, width = 16, height = 10, dpi = 300)
GO_enrich_UP_TCAD_bp <- enrichplot::pairwise_termsim(GO_enrich_up_TCAD_bp, method = "JC")
plt <- emapplot(GO_enrich_UP_TCAD_bp,
repel = TRUE,
showCategory = 20) +
ggtitle("Biological processes for UPregulated targets for TCAD") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures_transpl/transpl_GO_enrichment_emapplot_up_TCAD_BP.tiff", plot = plt, width = 16, height = 10, dpi = 300)